2024 Daiichi DS-1062a

bulk RNA-seq

library(dplyr)
library(ggplot2)
library(DT)
dir = "~/Desktop/DF/DFCI_Paweletz/2024_Daiichi_bulk_RNAseq/"

30348 bulk RNA-seq

count_TPM_matrix_all = readRDS(paste0(dir,"raw_data/30348/star_readspergeneoutP30348.rawData.count.tpm.rds"))
counts = count_TPM_matrix_all$count

# Remove genes with no expression across samples 
counts = counts[rowSums(counts) !=0,]
# counts %>% nrow() 
tpms = count_TPM_matrix_all$tpm

# match the rownames with counts 
tpms = tpms[rownames(counts),]
# tpms %>% nrow() # 22845

DEG analysis

List

  • Dxd/DMSO
  • IgG-Dxd/DMSO
  • DS-1062a/DMSO

DxD vs DMSO

library(DESeq2)
count.mtx = counts
cont = "DMSO"
tret = "DXD"
cols = c(c(1:3), c(4:6))
cond = c(rep(cont,3),
         rep(tret,3))
count.mtx = count.mtx[, cols]
# Generate info table
info <- data.frame(matrix(nrow = ncol(count.mtx), ncol = 2))
colnames(info) <- c('sample', 'cond')
info$sample <- colnames(count.mtx)
info$cond <- cond

# DESeq
dds <- DESeqDataSetFromMatrix(count.mtx, info, ~ cond)
dds <- DESeq(dds)
res <- results(dds)

res <- data.frame(res)

Volcano plot (version1)

fc = 1.5
pval = 0.05

res = res %>% mutate(DE=ifelse(log2FoldChange >= log2(fc) & pvalue < pval, 'UP',
                               ifelse(log2FoldChange <= -log2(fc) & pvalue < pval, 'DN','no_sig')))
res$DE = factor(res$DE, levels = c('UP','DN','no_sig'))
res1= res

res %>% 
  ggplot(aes(log2FoldChange, -log10(pvalue))) + 
  geom_point(size=1, alpha=0.5) + 
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_classic() +
  geom_vline(xintercept = c(-log2(fc),log2(fc)), color='grey') +
  geom_hline(yintercept = -log10(0.05),color='grey') 

Volcano plot (version2)

up = nrow(res[res$DE == "UP", ])
dn = nrow(res[res$DE == "DN", ])
res %>% ggplot(aes(log2FoldChange, -log10(pvalue), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(fc), log2(fc)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(pval), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() 

Volcano plot (version3)

res %>% ggplot(aes(log2FoldChange, -log10(pvalue), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(1.2), log2(1.2)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(0.05), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() +
  xlim(c(-10,10)) + scale_y_sqrt()

DEG table

res %>% dplyr::filter(log2FoldChange != "NA") %>% 
  DT::datatable(extensions = "Buttons", options = list(dom="Bfrtip", buttons=c("csv","excel"), pageLength=10))
library(clusterProfiler)
hallmark <- msigdbr::msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, gene_symbol)

perform_GSEA <- function(res, ref, pvalueCutoff = 1) {
  ranking <- function(res) {
    df <- res$log2FoldChange
    names(df) <- rownames(res)
    df <- sort(df, decreasing = TRUE)
    return(df)
  }
  
  ranked.res <- ranking(res)
  set.seed(123)
  x <- clusterProfiler::GSEA(geneList = ranked.res,
                             TERM2GENE = ref,
                             pvalueCutoff = pvalueCutoff,
                             pAdjustMethod = "BH",
                             verbose = TRUE,
                             seed = TRUE)
  
  result <- x@result %>% arrange(desc(NES))
  result <- result[, c('NES', 'pvalue', 'p.adjust', 'core_enrichment', 'ID')]
  return(result)
}

# Application 
gsea.res = perform_GSEA(res = res, ref = hallmark) 

filtered_gsea = gsea.res %>% mutate(sig= ifelse(pvalue <= 0.05,"p value <= 0.05", "p value > 0.05"))

# Modified GSEA NES plot 
gsea_nes_plot =function(gsea.res, title, fontsize.x = 5, fontsize.y = 6){
  gsea.res %>% ggplot(aes(reorder(ID, NES), NES)) +
    geom_col(aes(fill=sig), color="grey1", size=0.2) +
    coord_flip() +
    labs(x="Pathway", y="Normalized Enrichment Score",
         title= "GSEA") + 
    theme_classic() +
    # scale_fill_gradient(low = '#FF0000', high = '#E5E7E9') +
    scale_fill_manual(values = c("#FF0000","grey88")) +
    theme(axis.text.x= element_text(size=fontsize.x, face = 'bold'),
          axis.text.y= element_text(size=fontsize.y, face = 'bold'), 
          axis.title =element_text(size=10)) +ggtitle(title)
}

# Modified GSEA NES plot (p value color version) 
gsea_nes_plot_pval =function(gsea.res, title, fontsize.x = 5, fontsize.y = 6){
  gsea.res %>% ggplot(aes(reorder(ID, NES), NES)) +
  geom_col(aes(fill=-pvalue), color="grey1", size=0.2) +
  coord_flip() +
  labs(x="Pathway", y="Normalized Enrichment Score",
       title= "GSEA") + 
  theme_classic() +
  # scale_fill_gradient(low = '#FF0000', high = '#E5E7E9') +
  scale_fill_gradient(low = "grey88",high ="#FF0000" ) +
  theme(axis.text.x= element_text(size=fontsize.x, face = 'bold'),
        axis.text.y= element_text(size=fontsize.y, face = 'bold'), 
        axis.title =element_text(size=10),
        legend.title = element_blank()) +ggtitle(title)
}

GSEA NES Plot 1

t= paste0("DEGs from ",tret, " / ", cont )

gsea_nes_plot_pval(filtered_gsea, title = t, fontsize.x = 8, fontsize.y = 8)

GSEA NES Plot 2

t= paste0("DEGs from ",tret, " / ", cont )

gsea_nes_plot(filtered_gsea, title = t, fontsize.x = 8, fontsize.y = 8)

IgG vs DMSO

library(DESeq2)
count.mtx = counts
cont = "DMSO"
tret = "IgG"
cols = c(c(1:3), c(7:9))
cond = c(rep(cont,3),
         rep(tret,3))
count.mtx = count.mtx[, cols]
# Generate info table
info <- data.frame(matrix(nrow = ncol(count.mtx), ncol = 2))
colnames(info) <- c('sample', 'cond')
info$sample <- colnames(count.mtx)
info$cond <- cond

# DESeq
dds <- DESeqDataSetFromMatrix(count.mtx, info, ~ cond)
dds <- DESeq(dds)
res <- results(dds)

res <- data.frame(res)

Volcano plot (version1)

fc = 1.5
pval = 0.05

res = res %>% mutate(DE=ifelse(log2FoldChange >= log2(fc) & pvalue < pval, 'UP',
                               ifelse(log2FoldChange <= -log2(fc) & pvalue < pval, 'DN','no_sig')))
res$DE = factor(res$DE, levels = c('UP','DN','no_sig'))

res %>% 
  ggplot(aes(log2FoldChange, -log10(pvalue))) + 
  geom_point(size=1, alpha=0.5) + 
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_classic() +
  geom_vline(xintercept = c(-log2(fc),log2(fc)), color='grey') +
  geom_hline(yintercept = -log10(0.05),color='grey') 

res2= res

Volcano plot (version2)

up = nrow(res[res$DE == "UP", ])
dn = nrow(res[res$DE == "DN", ])
res %>% ggplot(aes(log2FoldChange, -log10(pvalue), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(fc), log2(fc)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(pval), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() 

Volcano plot (version3)

res %>% ggplot(aes(log2FoldChange, -log10(pvalue), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(1.2), log2(1.2)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(0.05), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() +
  xlim(c(-10,10)) + scale_y_sqrt()

DEG table

res %>% dplyr::filter(log2FoldChange != "NA") %>%  
  DT::datatable(extensions = "Buttons", options = list(dom="Bfrtip", buttons=c("csv","excel"), pageLength=10))
library(clusterProfiler)
hallmark <- msigdbr::msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, gene_symbol)

perform_GSEA <- function(res, ref, pvalueCutoff = 1) {
  ranking <- function(res) {
    df <- res$log2FoldChange
    names(df) <- rownames(res)
    df <- sort(df, decreasing = TRUE)
    return(df)
  }
  
  ranked.res <- ranking(res)
  set.seed(123)
  x <- clusterProfiler::GSEA(geneList = ranked.res,
                             TERM2GENE = ref,
                             pvalueCutoff = pvalueCutoff,
                             pAdjustMethod = "BH",
                             verbose = TRUE,
                             seed = TRUE)
  
  result <- x@result %>% arrange(desc(NES))
  result <- result[, c('NES', 'pvalue', 'p.adjust', 'core_enrichment', 'ID')]
  return(result)
}

# Application 
gsea.res = perform_GSEA(res = res, ref = hallmark) 

filtered_gsea = gsea.res %>% mutate(sig= ifelse(pvalue <= 0.05,"p value <= 0.05", "p value > 0.05"))

# Modified GSEA NES plot 
gsea_nes_plot =function(gsea.res, title, fontsize.x = 5, fontsize.y = 6){
  gsea.res %>% ggplot(aes(reorder(ID, NES), NES)) +
    geom_col(aes(fill=sig), color="grey1", size=0.2) +
    coord_flip() +
    labs(x="Pathway", y="Normalized Enrichment Score",
         title= "GSEA") + 
    theme_classic() +
    # scale_fill_gradient(low = '#FF0000', high = '#E5E7E9') +
    scale_fill_manual(values = c("#FF0000","grey88")) +
    theme(axis.text.x= element_text(size=fontsize.x, face = 'bold'),
          axis.text.y= element_text(size=fontsize.y, face = 'bold'), 
          axis.title =element_text(size=10)) +ggtitle(title)
}

# Modified GSEA NES plot (p value color version) 
gsea_nes_plot_pval =function(gsea.res, title, fontsize.x = 5, fontsize.y = 6){
  gsea.res %>% ggplot(aes(reorder(ID, NES), NES)) +
  geom_col(aes(fill=-pvalue), color="grey1", size=0.2) +
  coord_flip() +
  labs(x="Pathway", y="Normalized Enrichment Score",
       title= "GSEA") + 
  theme_classic() +
  # scale_fill_gradient(low = '#FF0000', high = '#E5E7E9') +
  scale_fill_gradient(low = "grey88",high ="#FF0000" ) +
  theme(axis.text.x= element_text(size=fontsize.x, face = 'bold'),
        axis.text.y= element_text(size=fontsize.y, face = 'bold'), 
        axis.title =element_text(size=10),
        legend.title = element_blank()) +ggtitle(title)
}

GSEA NES Plot 1

t= paste0("DEGs from ",tret, " / ", cont )

gsea_nes_plot_pval(filtered_gsea, title = t, fontsize.x = 8, fontsize.y = 8)

GSEA NES Plot 2

t= paste0("DEGs from ",tret, " / ", cont )

gsea_nes_plot(filtered_gsea, title = t, fontsize.x = 8, fontsize.y = 8)

DS-1062a vs DMSO

library(DESeq2)
count.mtx = counts
cont = "DMSO"
tret = "DS1062a"
cols = c(c(1:3), c(10:12))
cond = c(rep(cont,3),
         rep(tret,3))
count.mtx = count.mtx[, cols]
# Generate info table
info <- data.frame(matrix(nrow = ncol(count.mtx), ncol = 2))
colnames(info) <- c('sample', 'cond')
info$sample <- colnames(count.mtx)
info$cond <- cond

# DESeq
dds <- DESeqDataSetFromMatrix(count.mtx, info, ~ cond)
dds <- DESeq(dds)
res <- results(dds)

res <- data.frame(res)

Volcano plot (version1)

fc = 1.5
pval = 0.05

res = res %>% mutate(DE=ifelse(log2FoldChange >= log2(fc) & pvalue < pval, 'UP',
                               ifelse(log2FoldChange <= -log2(fc) & pvalue < pval, 'DN','no_sig')))
res$DE = factor(res$DE, levels = c('UP','DN','no_sig'))

res3 =res
res %>% 
  ggplot(aes(log2FoldChange, -log10(pvalue))) + 
  geom_point(size=1, alpha=0.5) + 
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_classic() +
  geom_vline(xintercept = c(-log2(fc),log2(fc)), color='grey') +
  geom_hline(yintercept = -log10(0.05),color='grey')

Volcano plot (version2)

up = nrow(res[res$DE == "UP", ])
dn = nrow(res[res$DE == "DN", ])
res %>% ggplot(aes(log2FoldChange, -log10(pvalue), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(fc), log2(fc)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(pval), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw()

Volcano plot (version3)

res %>% ggplot(aes(log2FoldChange, -log10(pvalue), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(1.2), log2(1.2)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(0.05), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() +
  xlim(c(-10,10)) + scale_y_sqrt()

DEG table

res %>% dplyr::filter(log2FoldChange != "NA") %>% 
  DT::datatable(extensions = "Buttons", options = list(dom="Bfrtip", buttons=c("csv","excel"), pageLength=10))
library(clusterProfiler)
hallmark <- msigdbr::msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, gene_symbol)

perform_GSEA <- function(res, ref, pvalueCutoff = 1) {
  ranking <- function(res) {
    df <- res$log2FoldChange
    names(df) <- rownames(res)
    df <- sort(df, decreasing = TRUE)
    return(df)
  }
  
  ranked.res <- ranking(res)
  set.seed(123)
  x <- clusterProfiler::GSEA(geneList = ranked.res,
                             TERM2GENE = ref,
                             pvalueCutoff = pvalueCutoff,
                             pAdjustMethod = "BH",
                             verbose = TRUE,
                             seed = TRUE)
  
  result <- x@result %>% arrange(desc(NES))
  result <- result[, c('NES', 'pvalue', 'p.adjust', 'core_enrichment', 'ID')]
  return(result)
}

# Application 
gsea.res = perform_GSEA(res = res, ref = hallmark) 

filtered_gsea = gsea.res %>% mutate(sig= ifelse(pvalue <= 0.05,"p value <= 0.05", "p value > 0.05"))

# Modified GSEA NES plot 
gsea_nes_plot =function(gsea.res, title, fontsize.x = 5, fontsize.y = 6){
  gsea.res %>% ggplot(aes(reorder(ID, NES), NES)) +
    geom_col(aes(fill=sig), color="grey1", size=0.2) +
    coord_flip() +
    labs(x="Pathway", y="Normalized Enrichment Score",
         title= "GSEA") + 
    theme_classic() +
    # scale_fill_gradient(low = '#FF0000', high = '#E5E7E9') +
    scale_fill_manual(values = c("#FF0000","grey88")) +
    theme(axis.text.x= element_text(size=fontsize.x, face = 'bold'),
          axis.text.y= element_text(size=fontsize.y, face = 'bold'), 
          axis.title =element_text(size=10)) +ggtitle(title)
}

# Modified GSEA NES plot (p value color version) 
gsea_nes_plot_pval =function(gsea.res, title, fontsize.x = 5, fontsize.y = 6){
  gsea.res %>% ggplot(aes(reorder(ID, NES), NES)) +
  geom_col(aes(fill=-pvalue), color="grey1", size=0.2) +
  coord_flip() +
  labs(x="Pathway", y="Normalized Enrichment Score",
       title= "GSEA") + 
  theme_classic() +
  # scale_fill_gradient(low = '#FF0000', high = '#E5E7E9') +
  scale_fill_gradient(low = "grey88",high ="#FF0000" ) +
  theme(axis.text.x= element_text(size=fontsize.x, face = 'bold'),
        axis.text.y= element_text(size=fontsize.y, face = 'bold'), 
        axis.title =element_text(size=10),
        legend.title = element_blank()) +ggtitle(title)
}

GSEA NES Plot 1

t= paste0("DEGs from ",tret, " / ", cont )

gsea_nes_plot_pval(filtered_gsea, title = t, fontsize.x = 8, fontsize.y = 8)

GSEA NES Plot 2

t= paste0("DEGs from ",tret, " / ", cont )

gsea_nes_plot(filtered_gsea, title = t, fontsize.x = 8, fontsize.y = 8)

res.all = list(res1=res1,
               res2= res2,
               res3=res3)
res.all %>% saveRDS(paste0(dir, "data/DMSO_3_comparison_res.all_30348.rds"))